######################### DATA PROCESSING ############################
######################################################################
#Name of code file: data_processing.R
#Purpose: prepare network diversity and tolerance data for analysis 
#Data In: network_tolerance_data.csv
#Data Out: network_tolerance_processed_data.csv
######################################################################

#Load Packages
library(readr)

#Set Working Directory
setwd("~/Dropbox/egypt_tolerance_wp_replication/")

#Read in Data
data<-read_csv("data/network_tolerance_data.csv")

#Make Network Diversity Variables

# Elite Network Diversity 
data$elite_total<-data$may15islamist_elite_friends + data$may15secular_elite_friends
data$prop_sec_elite<-data$may15secular_elite_friends/data$elite_total
data$prop_islamist_elite<-data$may15islamist_elite_friends/data$elite_total
data$elite_diversity<- 1- abs(data$prop_islamist_elite - data$prop_sec_elite)
#Deal with NAs 
data$elite_diversity <- replace(data$elite_diversity,which(is.na(data$elite_diversity)),1)

#Nonelite Network Diversity
data$nonelite_total<-data$may15nonelite_friends_60pct_islamist + data$may15nonelite_friends_60pct_secular
data$prop_sec_nonelite<-data$may15nonelite_friends_60pct_secular/data$nonelite_total
data$prop_islamist_nonelite<-data$may15nonelite_friends_60pct_islamist/data$nonelite_total
data$nonelite_diversity<- 1- abs(data$prop_islamist_nonelite - data$prop_sec_nonelite)
#Deal with NAs
data$nonelite_diversity <- replace(data$nonelite_diversity,which(is.na(data$nonelite_diversity)),1)

#Network Diversity with Moderate Friends
data$moderate_non_elite<-data$may15total_num_friends-data$may15nonelite_friends_60pct_islamist-data$may15nonelite_friends_60pct_secular
data$moderate_non_elite<-data$moderate_non_elite/data$may15total_num_friends
data$nonelite_diversity_mod<-((1-abs(data$prop_sec_nonelite - data$prop_islamist_nonelite)+data$moderate_non_elite))/2

#Tolerance Proportion Vars
data$prop_tolerant<-data$tolerant_total/(data$tolerant_total + data$intolerant_total)
data$prop_intolerant<-data$intolerant_total/(data$tolerant_total + data$intolerant_total)

#Create Logged Values of Vars 
data$log_tolerant<-log(data$tolerant_total+1)
data$log_intolerant<-log(data$intolerant_total+1)
data$log_elite<-log(data$elite_total+1)
data$log_total_friends<-log(data$may15total_num_friends+1)
data$log_relevant_tweets<-log(data$relevant_total+1)
data$log_status_count<-log(data$statuses_count)

#Time on Twitter
data$date_start <- as.POSIXct(data$created_at, format="%a %b %d %H:%M:%S %z %Y")
data$date_start<-as.Date(data$date_start)
data$date_now<-as.Date('2016-11-01')
data$twitter_time<- difftime(data$date_now ,data$date_start , units = c("days"))
data$twitter_time<-as.numeric(data$twitter_time)
data$log_time<-log(data$twitter_time)

#Secular/Moderate/Islamist
data$islamist[(data$may15islamist_elite_friends/data$elite_total)>.6] <- 1
data$islamist[is.na(data$islamist)]<-0

data$secular[(data$may15secular_elite_friends/data$elite_total)>.6] <- 1
data$secular[is.na(data$secular)]<-0

data$apolitical[data$may15islamist_elite_friends==0 & data$may15secular_elite_friends==0] <- 1
data$apolitical[is.na(data$apolitical)]<-0

data$moderate[data$islamist==0 & data$secular==0]<-1
data$moderate[is.na(data$moderate)]<-0
data$true_moderate[data$moderate==1 & data$apolitical==0]<-1
data$true_moderate[is.na(data$true_moderate)]<-0

#Write CSV
write_csv(data, "data/network_tolerance_processed_data.csv")





